****************************************************************************************************************************
Date: 30.juli 2014
By: Sanni, xru

Purpose: Add additional variables. Check of data and prepare for analysis

Latest update: 27AUG 2014
****************************************************************************************************************************;

libname in 'K:\Data\Workdata\702727\xru\NICU project\Fra Søren';
data master;
set in.nicu_master_060814;
run;


**************************************************
Tilpasser sample lidt til efterfølgende analyser
**************************************************;

*Dropper hvis født før 1970;
data sample1;
set master;

if birthyear<1970 then delete;
run;

*Drop hvis gestational weeks er forkerte "urealistisk";
data sample2;
set sample1;

if ga^=. & ga<22 then ga=.;
run;

data sample3;
set sample2;

if ga^=. & ga>45 then ga=.;
run;


*Dropper hvor fødselsvægt virker forkerte "urealistisk";
data sample4;
set sample3;

if bw<250 then bw=.;
if bw>6000 then bw=.;
run;

/*
*Dropper hvis mere end 10 børn;
data sample5;
set sample4;

if family_size>10 then delete;
run;
*/


*Ændre APGAR";
data sample6;
set sample4;

if APGAR>10 then APGAR=.;
run;

*Laver dummy variable for missing værdier;
data sample7;
set sample6;

if  basic_edu_mom=. then mom_educ_mis=1;
else mom_educ_mis=0;
if  basic_edu_dad=. then dad_educ_mis=1;
else dad_educ_mis=0;

if basic_edu_mom=. then basic_edu_mom=0;
if hsvoc_edu_mom=. then hsvoc_edu_mom=0;
if college_mom=. then college_mom=0;
if basic_edu_dad=. then basic_edu_dad=0;
if hsvoc_edu_dad=. then hsvoc_edu_dad=0;
if college_dad=. then college_dad=0;

if  married_birth=. then married_birth_mis=1;
else married_birth_mis=0;

if married_birth=. then married_birth=0;

if married_cohab_birth=. then married_cohab_birth_mis=1;
else married_cohab_birth_mis=0;

if married_cohab_birth=. then married_cohab_birth=0;

run;

*Laver først en variabel der indikere om alder er missing. Gøres inden jeg imputer;
data sample8;
set sample7;

if age_mom=. then age_mom_mis=1;
else age_mom_mis=0;

if age_dad=. then age_dad_mis=1;
else age_dad_mis=0;
run;

*Hvis forældre alder er missing sættes den (i første omgang) lig sample mean;
proc sql; 
create table sample8_impute as select *,
mean (age_mom) as mean_age_mom
from sample8;
quit;
run;


proc sql; 
create table sample9 as select *,
mean (age_dad) as mean_age_dad
from sample8_impute;
quit;
run;


data sample10;
set sample9;

if age_mom=. then age_mom=mean_age_mom;
if age_dad=. then age_dad=mean_age_dad;
run; 


/************************************************************
Dropper de variable jeg i første gang ikke skal bruge videre
*************************************************************/
data sample11;
set sample10;

drop
Dansk_s_prove1 priv_care_age5 cen_care_age5 private_age2 centralized_age2 Notreg_age2 age_first_care mean_age_mom mean_age_dad
;
run;

proc sort data=sample11;
by pnrm birthyear;
run; 

libname out 'K:\Data\Workdata\702727\xru\NICU project\Datasæt';

data out.nicu_samlet;
set sample11;

run;
